#!/usr/bin/python
import sqlite3
import fileinput
import sys, os
import re


def main(argv):
    DB_FILE = 'dpedia.db.sqlite'
    HTML_FILE_PREFIX = 'dracondict_%d.html'
    file_i = 1
    count = 0;

    conn = sqlite3.connect(DB_FILE)
    conn.row_factory = sqlite3.Row
    cur = conn.cursor()

    rawstr = r"""<div class="h3" id="(.*?)" title=".*?">.*?</span></span>(.*?)</div>"""
    compile_obj = re.compile(rawstr,  re.DOTALL| re.UNICODE)

    cur.execute('select id from pedia')
    idArray = cur.fetchall()
    while (file_i <= 22):
        f = open(HTML_FILE_PREFIX % (file_i))
        buff = f.read()

        match_objs = compile_obj.findall(buff)

        for match in match_objs:
            newid = True
            for id in idArray:
                if id['id'] == match[0]:
                    newid = False
            if not newid:
                cur.execute("update pedia set data=? where id=?", (unicode(match[1], "utf-8"), unicode(match[0],"utf-8")))
            else:
                print match[0]
            count += 1

        file_i += 1;

    conn.commit()
    cur.close()
    print count

if __name__ == '__main__':
    main(sys.argv[:])
